# General information

This script does the following:

1) Constructs Revenue and Expenditures data for services by state.

2) Sets the gravity system for state-region services flows and solves it.

## Input files

1. `0-Raw_Data/Fips/state_codes.csv`
2. `0-Raw_Data/Expenditure/SAEXP1_1997_2017_ALL_AREAS_.csv`
3. `1-Intermediate_Processed_Data/WIOD_countries.csv`
4. `0-Raw_Data/SAGDP/SAGDP2N_.csv`
5. `1-Intermediate_Processed_Data/distances.csv`
6. `0-Raw_Data/regions.csv`
7. `0-Raw_Data/sectors.csv`
8. `1-Intermediate_Processed_Data/country_country_step_.csv`

## Output files

1. `1-Intermediate_Processed_Data/state_exp_rev_services.csv`
2. `1-Intermediate_Processed_Data/data_services.csv`
3. `1-Intermediate_Processed_Data/vector_lambda_.csv`
4. `1-Intermediate_Processed_Data//matrix_B_.csv`
5. `1-Intermediate_Processed_Data//vector_solution_.csv`
6. `1-Intermediate_Processed_Data//Xij_matrix_services_.csv`

```{r setup, include=FALSE}
knitr::opts_knit$set(root.dir = normalizePath(".."))
remove(list = ls())
```

# Revenue and Expenditures data for services

## Revenue

```{r}

## Importing data
state_code <- read.csv(paste("0-Raw_Data/Fips/state_codes.csv", sep=""), header = TRUE, sep = ",", dec = ".")
bea_raw <- read.csv(paste("0-Raw_Data/Expenditure/SAEXP1_1997_2017_ALL_AREAS_.csv", sep=""), header = TRUE, sep = ",", dec = ".")
wiod_countries <- read.csv(paste("1-Intermediate_Processed_Data/WIOD_countries.csv", sep=""), header = TRUE, sep = ",", dec = ".")

# State codes   
state_code$state <- gsub(" ", "", state_code$state, fixed = TRUE)

# WIOD
wiod_countries <- wiod_countries %>% filter(sector==13)

# EXPORTS OF US (including exports to itself)
wiod_us_all_exp <- wiod_countries %>%
  dplyr::select(year, importer_country, value_USA) %>%
  group_by(year) %>%
  mutate(val_ = sum(value_USA)) %>% 
  ungroup() %>%
#This is just to drop duplicates easier; because at this point all the countries (for a given year) have the same "val_". We could have also wrote "importer_country == "BEL"", for example.
  filter(importer_country == "USA") %>%
  dplyr::select(year, val_)

# IMPORTS OF US (including imports from itself)
wiod_all_us_imp <- wiod_countries %>%
  filter(importer_country == "USA")
wiod_all_us_imp$val_ <- rowSums(wiod_all_us_imp[, 5:dim(wiod_all_us_imp)[2]])  
wiod_all_us_imp <- wiod_all_us_imp %>% dplyr::select(year, val_)

# Constructs the data of production by state

#  LOOPING OVER STATES  TO GET DATA *** 
all_states <- c()
for (i in state_code$code) {
state <- read.csv(paste("0-Raw_Data/SAGDP/SAGDP2N_",i,"_1997_2018.csv", sep=""), header = TRUE, sep = ",", dec = ".")
state <- state %>% 
  dplyr::select(-TableName, -ComponentName, -Unit, -X2018) %>%
  filter(is.na(Region) == FALSE) %>%
  mutate(code = i)
all_states <- rbind(all_states, state)
}
colnames(all_states) <- c("GeoFIPS", "GeoName", "Region", "IndustryId", "IndustryClassification", "Description", "1997", "1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "code")  

#  SERVICE SECTORS   ***  
#  	SECTORS USED AND THEIR MAPPING TO WIOD (indexed by ci)

# 13.	(NAICS XX) Construction (c18); 
# 14.	(NAICS 42-45) Wholesale and Retail Trade (c19-c21); 
# 15.	(NAICS 481-488) Transport Services (c23-c26);
# 16.	(NAICS 511ñ518) Information Services (c27); 
# 17.	(NAICS 521ñ525) Finance and Insurance (c28); 
# 18.	(NAICS 531-533)  Real Estate (c29-c30); 
# 19.	(NAICS 61) Education (c32); 
# 20.	(NAICS 621ñ624) Health Care (c33); 
# 21.	(NAICS 721ñ722) Accommodation and Food Services (c22); 
# 22.	(NAICS 493, 541, 55, 561, 562, 711ñ713, 811-814) Other Services (c34).

all_states <- all_states %>%
  dplyr::rename(state = GeoName, naics = IndustryClassification) %>%
  mutate(sector = NA) %>%
  mutate(sector = ifelse(naics == "23", 13, sector)) %>%
  mutate(sector = ifelse(naics == "42" | naics == "43" | naics == "44-45", 14, sector)) %>%
  mutate(sector = ifelse(naics == "481" | naics == "482" | naics == "483" | naics == "484" | naics == "485" | naics == "486" | naics == "487-488, 492", 15, sector)) %>%
  mutate(sector = ifelse(naics == "511" | naics == "512" | naics == "515, 517" | naics == "518, 519", 16, sector)) %>%
  mutate(sector = ifelse(naics == "521-522" | naics == "523" | naics == "524" | naics == "525", 17, sector)) %>%
  mutate(sector = ifelse(naics == "531" | naics == "532-533", 18, sector)) %>%
  mutate(sector = ifelse(naics == "61", 19, sector)) %>%
  mutate(sector = ifelse(naics == "621" | naics == "622" | naics == "623" | naics == "624", 20, sector)) %>%
  mutate(sector = ifelse(naics == "721" | naics == "722", 21, sector)) %>%
  mutate(sector = ifelse(naics == "493" | naics == "54" | naics == "55" | naics == "561" | naics == "562" | naics == "711-712" | naics == "713" | naics == "81", 22, sector)) %>%
  filter(is.na(sector) == FALSE) %>%
  mutate(sector = as.numeric(sector))

#  SETTING FINAL DATA   ***  
# Sum over all services' sub-sectors for each year for each state.
all_states <- melt(all_states, id.vars=c("GeoFIPS", "state", "Region", "IndustryId", "naics", "Description", "code", "sector"), variable.name="year", value.name = "revenue")
all_states <- all_states %>%
  mutate(revenue = as.numeric(revenue)) %>%
  mutate(revenue = ifelse(is.na(revenue) == TRUE, 0, revenue)) %>%
  filter(sector >= 13) %>%
  group_by(state, code, year) %>%
  mutate(revenue = sum(revenue)) %>%
  ungroup() %>%
  distinct(state, code, year, .keep_all = TRUE) %>%
  dplyr::select(state, code, year, revenue) %>%
  arrange(state, code)
all_states$state <- gsub(" ", "", all_states$state, fixed = TRUE)

```

## Expenditure in services (by state by year).

```{r}

# IMPORTING BEA DATA
# KEEPING AND RENAMING RELEVANT VARIABLES

# making it a number instead of string
bea_raw$GeoFIPS <- as.numeric(bea_raw$GeoFIPS)
# dropping regions (leaving only states)
# dropping DC
# keeping relevant variables only
bea_raw <- bea_raw %>%
  filter(GeoFIPS <= 90000 & GeoFIPS != 0) %>%
  filter(GeoName != "District of Columbia") %>%
  dplyr::select(-GeoFIPS, -Region, -TableName, -ComponentName, -Unit, -IndustryClassification) %>%
  dplyr::rename(state = GeoName)
colnames(bea_raw) <- c("state", "line", "description", "1997", "1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017")

# KEEPING RELEVANT LINES
# keeping services only
bea_raw <- bea_raw %>%
  filter(line == 13) %>%
  dplyr::select(-line, -description) %>%
  arrange(state)
bea_raw$state <- gsub(" ", "", bea_raw$state, fixed = TRUE)
bea_raw <- melt(bea_raw, id.vars=c("state"), variable.name="year", value.name = "expen")
```

## Merging revenue with expenditure + proportionality (state level)

```{r}
bea_raw <- bea_raw %>%
  left_join(all_states, by=c('year'='year', 'state'='state')) %>%
  arrange(state, code, year, expen, revenue) %>%
  dplyr::select(state, code, year, expen, revenue)

#	Proportionality with WIOD (expenditure by states sums up to US imports; revenue by states sums up to US exports).
bea_raw <- bea_raw %>%
  mutate(year = as.numeric(as.character(year))) %>%
  group_by(year) %>%
  mutate(tot_expen = sum(expen)) %>%
  mutate(tot_revenue = sum(revenue)) %>%
  ungroup() %>%
  mutate(sh_expen = expen/tot_expen) %>%
  mutate(sh_revenue = revenue/tot_revenue) %>%
  left_join(wiod_all_us_imp, by=c('year'='year')) %>%
  mutate(expen = sh_expen*val_) %>%
  dplyr::select(-val_) %>%
  left_join(wiod_us_all_exp, by=c('year'='year')) %>%
  mutate(revenue = sh_revenue*val_) %>%
  dplyr::select(-val_) %>%
  dplyr::select(state, year, revenue, expen) %>%
  arrange(year, state)
  
#check that expenditure by states sums up to US imports, and that revenue by states sums up to US exports
check <- bea_raw %>%
  group_by(year) %>%
  mutate(tot_expen = sum(expen)) %>%
  mutate(tot_revenue = sum(revenue)) %>%
  ungroup() %>%
  distinct(year, .keep_all = TRUE) %>%
  dplyr::select(year, tot_expen, tot_revenue) %>%
  #Years with missing values are dropped.
  filter(year >= 2000 & year <= 2011)
round(sum(check$tot_expen - wiod_all_us_imp$val_), 5) == 0
round(sum(check$tot_revenue - wiod_us_all_exp$val_), 5) == 0

write.table(bea_raw, file = paste("1-Intermediate_Processed_Data/state_exp_rev_services.csv", sep=""), sep = ",", row.names = FALSE)
state_exp_rev_services <- read.csv(paste("1-Intermediate_Processed_Data/state_exp_rev_services.csv", sep=""), header = TRUE, sep = ",", dec = ".")

```

## Merging with expenditure and revenue at the country level (and include distances).

```{r}

# Distances (d_{ij})
distances <- read.csv(paste("1-Intermediate_Processed_Data/distances.csv", sep=""), header = TRUE, sep = ",", dec = ".")

# Services expenditure
expen <- wiod_countries %>%
  filter(importer_country != "USA") %>%
  dplyr::rename(country = importer_country)
expen$expen <- rowSums(expen[, 5:dim(expen)[2]])
expen <- expen %>% dplyr::select(country, year, expen)

# Services production
country_rev_exp_services <- melt(wiod_countries, id.vars=c("year", "importer_country", "sector", "importer"), variable.name="country", value.name = "revenue")
country_rev_exp_services <- country_rev_exp_services %>%
  mutate(country = substr(x=country, start=7, stop=9)) %>%
  filter(country != "USA") %>%
  group_by(year, country) %>%
  mutate(revenue = sum(revenue)) %>%
  ungroup() %>%
  distinct(year, country, .keep_all = TRUE) %>%
  dplyr::select(country, year, revenue) %>%
  left_join(expen, by=c('year'='year', 'country'='country'))

#  EXPENDITURE AND REVENUE   ***  
# STATES
revenue1 <- state_exp_rev_services %>% 
  filter(year >= 2000) %>%
  dplyr::rename(iso_o = state, R_i = revenue) %>%
  dplyr::select(iso_o, R_i, year)
expen1 <- state_exp_rev_services %>% 
  filter(year >= 2000) %>%
  dplyr::rename(iso_d = state, E_j = expen) %>%
  dplyr::select(iso_d, E_j, year)
# COUNTRIES
revenue2 <- country_rev_exp_services %>% 
  dplyr::rename(iso_o = country, R_i = revenue) %>%
  dplyr::select(iso_o, R_i, year)
expen2 <- country_rev_exp_services %>% 
  dplyr::rename(iso_d = country, E_j = expen) %>%
  dplyr::select(iso_d, E_j, year)

revenue <- rbind(revenue1, revenue2)
expen <- rbind(expen1, expen2)

# MERGING EXPENDITURE AND REVENUE (AND DISTANCES).
final <- distances %>%
  left_join(revenue, by=c('year'='year', 'iso_o'='iso_o')) %>%
  left_join(expen, by=c('year'='year', 'iso_d'='iso_d')) %>%
  arrange(year, iso_o, iso_d) %>%
  dplyr::select(year, iso_o, iso_d, dist, R_i, E_j) %>%
  mutate(iso_o =  gsub(" ", "", iso_o, fixed = TRUE)) %>%
  mutate(iso_d =  gsub(" ", "", iso_d, fixed = TRUE))


write.table(final, file = paste("1-Intermediate_Processed_Data/data_services.csv", sep=""), sep = ",", row.names = FALSE)

```


# Gravity system for services

## Theory

Start with the standard gravity equation: 

$$X_{ij}=\left(\frac{w_{i}\tau_{ij}}{P_{j}}\right)^{-\varepsilon}E_{j} \;\; (1)$$

where $P_{j}^{-\varepsilon}=\sum_{i}\left(w_{i}\tau_{ij}\right)^{-\varepsilon}$.
Consider
$$\Pi_{i}^{-\varepsilon}= \sum_{j} \tau_{ij}^{ -\varepsilon}P_{j}^{\varepsilon}E_{j} \;\; (2)$$
Since $\sum_{j}X_{ij}=R_{i}$, (1) implies $\sum_{j}\left(\frac{w_{i}\tau_{ij}}{P_{j}} \right)^{-\varepsilon}E_{j}=R_{i}.$ Then, from (2), we conclude $w_{i}^{-\varepsilon} \Pi_{i}^{-\varepsilon}=R{i}$. Hence we have:

$$X_{ij}=\frac{\tau_{ij}^{-\varepsilon}}{\left(\Pi_{i}P_{j}\right)^{-\varepsilon}}R_{i}E_{j}=\tilde{\tau}_{ij}\tilde{\Pi}_{i}^{-1}\tilde{P}_{j}^{-1}R_{i}E_{j}  \;\; (3)$$

where $\tilde{P}_{j}\equiv P_{j}^{-\varepsilon}$ and $\tilde{\Pi}_{i}\equiv \Pi_{i}^{-\varepsilon}$, and $\tilde{\tau}_{ij}\equiv\tau_{ij}^{-\varepsilon}$.
We want to use (3) to calculate services bilateral flows for states, so we need to calculate  $\tilde{\Pi}_{i}^{-1}$ and $\tilde{P}_{j}$. To do that, first note that if we substitute $w_{i}^{-\varepsilon} \Pi_{i}^{-\varepsilon}=R{i}$ in $P_{j}^{-\varepsilon}=\sum_{i}\left(w_{i}\tau_{ij}\right)^{-\varepsilon}$, we have

$$\tilde{P}_{j}  =  \sum_{i}\tilde{\tau}_{ij}\tilde{\Pi}_{i}^{-1}R_{i} \;\; (4)$$

Equations (2) and (4) give us a system that, once solved, lets us know $\tilde{\Pi}_{i}^{-1}$ and $\tilde{P}_{j}$.
Specifically, using (2) and (4) we get:
$$
\tilde{P}_{j}  =  \sum_{i}\tilde{\tau}_{ij}\tilde{\Pi}_{i}^{-1}R_{i}\quad j\in US  \;\; (5)
$$
$$
\tilde{\Pi}_{i}  = \sum_{j}\tilde{\tau}_{ij}\tilde{P}_{j}^{-1}E_{j}\quad i\in US 
 \;\; (6)
$$
Now, from WIOD we know $X_{ij}$ for some pairs. Define $S_{j}$ as the set of $i$'s such that we know $X_{ij}$; and define $S^{*}_{i}$ as the set of $j$'s such that we know $X_{ij}$. Letting 

$$X_{S_{j},j}\equiv\sum_{i\in S_{j}}X_{ij}, \quad\lambda_{j}\equiv1-X_{S_{j},j}/E_{j}, \quad X_{i,S_{i}^{*}}=\sum_{j\in S_{i}^{*}}X_{ij}, \quad\lambda_{i}^{*}\equiv1-X_{i,S_{i}^{*}}/R_{i}, $$
we conclude that
$$
\lambda_{j}\tilde{P}_{j} =  \sum_{i\notin S_{j}}\tilde{\tau}_{ij}\tilde{\Pi}_{i}^{-1}R_{i}\quad j\notin US 
\;\; (7)
$$

$$
\lambda_{i}^{*}\tilde{\Pi}_{i}  =  \sum_{j\notin S_{i}^{*}}\tilde{\tau}_{ij}\tilde{P}_{j}^{-1}E_{j}\quad i\notin US
\;\; (8)
$$
Equations (5), (6), (7), and (8) are our full system. We can represent it in matrix notation. Define $P_{s}=(\tilde{P}_{1},...,\tilde{P}_{50})'$ for the states, $P_{c}=(\tilde{P}_{51},...,\tilde{P}_{87})'$ for the countries. Similarly for $\Pi_{s}$, $\Pi_{c}$,$\lambda_{c}$, and $\lambda_{c}^{*}$. Define $\lambda=\left(1_{1\times50},\lambda_{c},1_{1\times50},\lambda_{c}^{*}\right)'$. Define $S=\left(P_{s},P_{c},\Pi_{s},\Pi_{c}\right)'$, and with some abuse of notation $S^{-1}=\left(P_{s}^{-1},P_{c}^{-1},\Pi_{s}^{-1},\Pi_{c}^{-1}\right)'$ with dimensions $(s+c+s+c)\times(1)$. Define $\left(\mathcal{T} R\right)_{ss}$ as follows:

$$ \boldsymbol{\left(\mathcal{T} R\right)_{ss}}=\left(\begin{array}{ccc}
\tilde{\tau}_{s_{1}s_{1}}R_{s_{1}} & \cdots & \tilde{\tau}_{s_{k}s_{1}}R_{s_{k}}\\
\vdots & \ddots & \vdots\\
\tilde{\tau}_{s_{1}s_{k}}R_{s_{1}} & \cdots & \tilde{\tau}_{s_{k}s_{k}}R_{s_{k}}
\end{array}\right), $$

and define , $\left(\mathcal{T} R\right)_{sc} \,, \left(\mathcal{T} R\right)_{cs}$,$\left(\mathcal{T} E\right)_{ss}$,
$\left(\mathcal{T} E\right)_{sc} \,, \left(\mathcal{T} E\right)_{cs}$ analogously. 
The full system can be written as:

$$ \lambda\circ S=\left(\begin{array}{cccc}
0 & 0 & \left(\mathcal{T} R\right)_{ss} & \left(\mathcal{T} R\right)_{sc}\\
0 & 0 & \left(\mathcal{T} R\right)_{cs} & 0\\
\left(\mathcal{T} E\right)_{ss} & \left(\mathcal{T} E\right)_{sc} & 0 & 0\\
\left(\mathcal{T} E\right)_{cs} & 0 & 0 & 0
\end{array}\right)\cdot S^{-1}, $$

(remember that we are searching for states' flows, so we ignore $cc$ pairs). Or in a more compact representation:
$$ \lambda\circ S=B\cdot S^{-1}, $$

where $\circ$ is the element-by-element product and $B$ is the big matrix. Given $\left\{ E_{j}\right\}$  and $\left\{ R_{i}\right\}$  and $\left\{ \tilde{\tau}_{ij}\right\}$ (more on the computation of $\tilde{\tau}_{ij}$ below) ,  we can get $\left\{ \tilde{P}_{j}\right\}$  and $\left\{ \tilde{\Pi}_{i}\right\}$. The solution for $\left\{ \tilde{P}_{j}, \tilde{\Pi}_{i} \right\}$ is unique up to a constant. This indeterminacy requires a normalization. We thus impose $\tilde{P}_{1}=100$. Then we compute $\left\{ X_{ij}\right\}$  from (3) as $$X_{ij}=\tilde{\tau}_{ij}\tilde{\Pi}_{i}^{-1}\tilde{P}_{j}^{-1}R_{i}E_{j}.$$

## Importing data to R

```{r, warning=FALSE}
#COEFFIENTS FROM REGRESSIONS
own_dummy <- 7.357
dist_coeff <- -0.376

# number of countries (n_c) and number of states (n_s)
regions <- read.csv("0-Raw_Data/regions.csv")
regions$region <- as.character(regions$region)
c_names <- regions %>% filter(status=="country", region!="USA")
c_names <- c_names$region

s_names <- regions %>% filter(status=="state") 
s_names <- s_names$region

n_s <- length(s_names)
n_c <- length(c_names)

#WIOD
wiod_base <- c()
for (yr in 2000:2007) {
  wiot <- country.country <- read.csv(paste('1-Intermediate_Processed_Data//country_country_step_', yr, '.csv', sep= ""), header = TRUE, sep = ",", dec = ".")
  wiod_base <- rbind(wiod_base, wiot)
  }
data_gravity_services.base <- read.csv( file="1-Intermediate_Processed_Data//data_services.csv", header=TRUE, sep="," )

#Number of sectors minus three (to ease referencing sector 13: services).
reclass_sectors <- read.csv("0-Raw_Data/sectors.csv")
n_sec <- length(unique(reclass_sectors$final_sector)) -3

#### small (tolerance) value to check between calculations
epsilon<-0.01;

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
```

## Creating and solving gravity system for services

```{r inputs, warning=FALSE}
for (yr in 2000:2007) {
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# 1. LOAD DATA
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
wiod <- wiod_base
## path
## WIOD US
us.us.wiod <- wiod %>% 
  filter( year == yr, sector == n_sec + 1 ) %>% 
  filter( importer == 'USA' ) %>% 
  dplyr::select( USA );
us.us.wiod <- as.numeric(us.us.wiod[1])
# US exports
us.exports.wiod <- wiod %>% 
  filter( year == yr, sector == n_sec + 1 ) %>% 
  filter( importer != 'USA' ) %>% 
  dplyr::select( USA );
# US imports
us.imports.wiod <- wiod %>% 
  filter( year == yr, sector == n_sec + 1 ) %>% 
  filter( importer == 'USA' ) %>% 
  dplyr::select( -year, -USA, -sector, -importer)
## WIOD without USA.
wiod <- wiod_base %>% 
  filter( year == yr, sector == n_sec + 1 ) %>% 
  filter( importer != 'USA' ) %>% 
  dplyr::select( -year, -USA, -sector, -importer  ) 
## Gravity data set (distances, revenue, and expenditure)
data_gravity_services <- data_gravity_services.base %>% 
  filter( year == yr ) %>% 
  dplyr::select( -year ) %>% 
  mutate( iso_o = as.character(iso_o), iso_d = as.character(iso_d) ) %>% 
  mutate( len_o = ifelse(nchar(iso_o)<4, 'COUNTRY', 'STATE'), 
          len_d = ifelse(nchar(iso_d)<4, 'COUNTRY', 'STATE') )

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# 2. ID FOR STATES AND COUNTRIES
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# States
id.states <- data_gravity_services %>% 
  filter( len_o == 'STATE' ) %>% 
  dplyr::select( iso_o ) %>% 
  distinct( iso_o ) %>% 
  arrange( iso_o ) %>% 
  mutate( id = row_number() ) %>% 
  dplyr::rename( region=iso_o ) %>% 
  dplyr::select( id, region )
# Countries
id.countries <- data_gravity_services %>% 
  filter( len_o == 'COUNTRY' ) %>% 
  dplyr::select( iso_o ) %>% 
  distinct( iso_o ) %>% 
  arrange( iso_o ) %>% 
  mutate( id = n_s + row_number() ) %>% 
  dplyr::rename( region = iso_o ) %>% 
  dplyr::select( id, region )
# Total
id.total <- rbind( id.states, id.countries )

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# 3. DISTANCES, PRODUCTION AND EXPENDITURE
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##
## 3.1. DISTANCE MATRIX
##
dist.matrix <- data_gravity_services %>% 
  dplyr::select( iso_o, iso_d, dist ) %>% 
  left_join( y = id.total, by=c('iso_o'='region') ) %>% 
  dplyr::rename( id_o = id ) %>% 
  left_join( y = id.total, by=c('iso_d'='region') ) %>% 
  dplyr::rename( id_d = id ) %>% 
  dplyr::select( id_o, id_d, dist ) %>% 
  arrange( id_o, id_d ) %>% 
  spread( key = id_d, value = dist )
dist.matrix <- as.matrix( dist.matrix[,2:(n_c+n_s+1)] )

#Matrix of taus (we refer to it as "phi" from now on). This step eases the calculation of TE and TR matrices.
#we use the coefficients from 1-WIOD_VA_shares_io_shares_distances.RMD
phi <- diag(x = own_dummy, n_c+n_s, n_c+n_s)
phi <- exp(phi)
phi <- phi*dist.matrix^dist_coeff
dist.matrix <- phi

##
## 3.2. PRODUCTION/REVENUE (R_i) VECTOR
##
gdp.vector <- data_gravity_services %>% 
  dplyr::select( iso_o, R_i ) %>% 
  distinct( iso_o, .keep_all=TRUE ) %>% 
  left_join( y = id.total, by=c('iso_o'='region') ) %>% 
  dplyr::select( id, R_i ) %>% 
  arrange( id );
gdp.vector <- as.vector( gdp.vector$R_i )
##
## 3.3. EXPENDITURE (E_j) VECTOR
##
exp.vector <- data_gravity_services %>% 
  dplyr::select( iso_d, E_j ) %>% 
  distinct( iso_d, .keep_all=TRUE ) %>% 
  left_join( y = id.total, by=c('iso_d'='region') ) %>% 
  dplyr::select( id, E_j ) %>% 
  arrange( id )
exp.vector <- as.vector( exp.vector$E_j )

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# 4. VECTOR LAMBDA AND MATRIX B
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

##
## 4.1 Matrices for E and R (repeat gdp and expenditure vectors in n_s+n_c rows). This step eases the calculation of TE and TR matrices.
##

mat.Y <- matrix( data=rep(gdp.vector, (n_s + n_c)), nrow=(n_s + n_c), byrow=TRUE )
mat.X <- matrix( data=rep(exp.vector, (n_s + n_c)), nrow=(n_s + n_c), byrow=TRUE )

##
## 4.2 Sub-matrices
##

##TR sub-matrices
phi.Y.ss <- dist.matrix[(1:n_s), (1:n_s)] * mat.Y[(1:n_s), (1:n_s)]
phi.Y.sc <- dist.matrix[(1:n_s), ((n_s+1):(n_s+n_c))] * mat.Y[(1:n_s), ((n_s+1):(n_s+n_c))]
phi.Y.cs <- dist.matrix[((n_s+1):(n_s+n_c)), (1:n_s)] * mat.Y[((n_s+1):(n_s+n_c)), (1:n_s)]
phi.Y.cc <- dist.matrix[((n_s+1):(n_s+n_c)), ((n_s+1):(n_s+n_c))] * 
  mat.Y[((n_s+1):(n_s+n_c)), ((n_s+1):(n_s+n_c))]
##TE sub-matrices
phi.X.ss <- dist.matrix[(1:n_s), (1:n_s)] * mat.X[(1:n_s), (1:n_s)]
phi.X.sc <- dist.matrix[(1:n_s), ((n_s+1):(n_s+n_c))] * mat.X[(1:n_s), ((n_s+1):(n_s+n_c))]
phi.X.cs <- dist.matrix[((n_s+1):(n_s+n_c)), (1:n_s)] * mat.X[((n_s+1):(n_s+n_c)), (1:n_s)]
phi.X.cc <- dist.matrix[((n_s+1):(n_s+n_c)), ((n_s+1):(n_s+n_c))] * 
  mat.X[((n_s+1):(n_s+n_c)), ((n_s+1):(n_s+n_c))]

##
## 4.3 WIOD matrix
##
wiod.matrix <- as.matrix( wiod )

##
## 4.4 lambda vector
##

## imports and exports
imports.from.countries <- as.matrix( apply(X=wiod.matrix, MARGIN=1, FUN=sum) )
exports.to.countries <- as.matrix( apply(X=wiod.matrix, MARGIN=2, FUN=sum) )
## revenue and expenditure (to calculate shares)
X_c <- as.matrix( exp.vector[(n_s + 1):(n_s + n_c)] )
Y_c <- as.matrix( gdp.vector[(n_s + 1):(n_s + n_c)] )

#Change the positions of RoW and RUS, so that all matrices/vectors have the same order.
temp <- Y_c[31, ]
Y_c[31, ] <- Y_c[32, ]
Y_c[32, ] <- temp

## lambdas
lambda.p <- (X_c-imports.from.countries) / X_c

#Change the positions of RoW and RUS, so that all matrices/vectors have the same order.
temp <- lambda.p[31, ]
lambda.p[31, ] <- lambda.p[32, ]
lambda.p[32, ] <- temp
lambda.pi <- (Y_c- exports.to.countries) / Y_c

## lambda vector
ones.ns <- matrix( data=rep(1, n_s), nrow = n_s, ncol = 1 )
lambda_vector <- rbind( ones.ns, lambda.p, ones.ns, lambda.pi )
##
## 4.5 Big zeros matrices
##
zeros.nc.nc <- matrix( data = rep(0, n_c*n_c), nrow = n_c, ncol = n_c )
zeros.big <- matrix( data = rep(0, (n_s + n_c)*(n_s + n_c)), nrow = (n_s + n_c), ncol = (n_s + n_c) )

##
## 4.6 MATRIX B
##

## Block for system (5) and (7)
block.10 <- rbind( cbind(phi.Y.ss, phi.Y.sc),
                   cbind(phi.Y.cs, zeros.nc.nc) )
## Block for system (6) and (8)
block.11 <- rbind( cbind(phi.X.ss, phi.X.sc),
                   cbind(phi.X.cs, zeros.nc.nc) )
## Relevant matrix
BIG.MAT <- rbind( cbind(zeros.big, block.10),
                  cbind(block.11, zeros.big) )
#Change the positions of RoW and RUS, so that all matrices/vectors have the same order.
temp <- BIG.MAT[81, ]
BIG.MAT[81, ] <- BIG.MAT[82, ]
BIG.MAT[82, ] <- temp
temp <- BIG.MAT[ ,81]
BIG.MAT[ ,81] <- BIG.MAT[ ,82]
BIG.MAT[ ,82] <- temp
temp <- BIG.MAT[168, ]
BIG.MAT[168, ] <- BIG.MAT[169, ]
BIG.MAT[169, ] <- temp
temp <- BIG.MAT[ ,168]
BIG.MAT[ ,168] <- BIG.MAT[ ,169]
BIG.MAT[ ,169] <- temp

## Saving input matrices for the gravity system.
write.csv(BIG.MAT, file= paste('1-Intermediate_Processed_Data//matrix_B_',yr, '.csv', sep= ""), row.names=FALSE, na="" );
write.csv(lambda_vector, file= paste('1-Intermediate_Processed_Data//vector_lambda_',yr, '.csv', sep= ""), row.names=FALSE, na="" );


## Gravity Systems

# Services gravity
BIG.MAT <- as.matrix(read.csv(paste('1-Intermediate_Processed_Data//matrix_B_',yr, '.csv', sep= "")))
lambda_vector <- as.matrix(read.csv(paste('1-Intermediate_Processed_Data//vector_lambda_',yr, '.csv', sep= "")))

# This function solves the equilibrium system using non-linear least squares.
gravity_system <- function(x){
    Bfirst <- ( ((lambda_vector*x) / (BIG.MAT%*%(x^(-1)))) -1)*100
    Bsecond <- x[1] - 100
    B <- as.matrix(rbind(Bfirst, Bsecond))  
}

start <- 100*matrix(1, 2*(n_c + n_s), 1)
if (yr > 2000){start <- as.matrix(sol)}

sol <- lsqnonlin(gravity_system, start)
sol <- as.matrix(sol$x)


write.csv(sol, file= paste('1-Intermediate_Processed_Data//vector_solution_',yr, '.csv', sep= ""), row.names=FALSE, na="" );


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
###FROM SOLUTION TO X_ij
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
SOL <- as.matrix(read.csv(paste('1-Intermediate_Processed_Data//vector_solution_',yr, '.csv', sep="")))

#checking if the solver worked.
value <-  lambda_vector*SOL / BIG.MAT %*% SOL^{-1}-1;
if(norm(value, "2")>epsilon)
  stop(paste("Solution does not seem to work for year", yr))

x<- SOL;

##calculating X_ij

### Basic vector
P.s <- as.matrix( x[1:n_s] );
P.c <- as.matrix( x[(n_s+1):(n_s+n_c)] );
Pi.s <- as.matrix( x[(n_s+n_c+1):(2*n_s+n_c)] );
Pi.c <- as.matrix( x[(2*n_s+n_c+1):(2*(n_s+n_c))] );

### Inverted basic
P.s.inv <- P.s^(-1); 
P.c.inv <- P.c^(-1);
Pi.s.inv <- Pi.s^(-1);
Pi.c.inv <- Pi.c^(-1);

#Pi^(-1)*R and P^(-1)*E (and change the positions of RoW and RUS, when necessary, so that all matrices/vectors have the same order.)
temp <- gdp.vector[81]
gdp.vector[81] <- gdp.vector[82]
gdp.vector[82] <- temp
Pi.sq.mat <- matrix( data=rep(c(Pi.s.inv, Pi.c.inv)*gdp.vector, (n_s + n_c)),
                     nrow=(n_s+n_c), ncol=(n_s+n_c), byrow=TRUE );
temp <- exp.vector[81]
exp.vector[81] <- exp.vector[82]
exp.vector[82] <- temp
P.sq.mat <- matrix( data=rep(c(P.s.inv, P.c.inv)*exp.vector, (n_s + n_c)),
                    nrow=(n_s+n_c), ncol=(n_s+n_c), byrow=FALSE );
temp <- P.sq.mat[,81]
P.sq.mat[,81] <- P.sq.mat[,82]
P.sq.mat[,82] <- temp
temp <- Pi.sq.mat[,81]
Pi.sq.mat[,81] <- Pi.sq.mat[,82]
Pi.sq.mat[,82] <- temp

#Bilateral flows.

#Change the positions of RoW and RUS, so that all matrices/vectors have the same order.
temp <- dist.matrix[81,]
dist.matrix[81,] <- dist.matrix[82,]
dist.matrix[82,] <- temp

#X_ij=tau*Pi^(-1)P^(-1)*R*E
Xij.matrix <- Pi.sq.mat * dist.matrix * P.sq.mat;
temp <- Xij.matrix[,81]
Xij.matrix[,81] <- Xij.matrix[,82]
Xij.matrix[,82] <- temp
write.csv(Xij.matrix, file= paste('1-Intermediate_Processed_Data//Xij_matrix_services_', yr, '.csv', sep= ""), row.names=FALSE, na="" );
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
###CHECKS for X_ij
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#CHECK: US consumption from US =US consumption from US from WIOD
value1 <- sum(Xij.matrix[(1:n_s), (1:n_s)])
value2 <- wiod_base %>%
  filter(sector == n_sec + 1, year == yr, importer == "USA") %>%
  dplyr::select(-importer, -sector, -year)
value2=value2$USA;
#checking
if(abs(value1/value2-1)>epsilon) 
  stop(paste(" US consumption from US != value from WIOD for", yr, " value=", abs(value1/value2-1)))

#Check: states' exports to countries add to WIOD US exports to countries (sector 13)
value1 <- sum(Xij.matrix[((n_s + 1):(n_s + n_c)), (1:n_s)])
value2 <- wiod_base %>%
  filter(year == yr, sector == n_sec + 1, importer != "USA")
value2 <- sum(value2$USA)
if(abs(value1/value2-1)>epsilon) 
  stop(paste("states' exports to countries != WIOD US exports to countries for", yr, " value=", abs(value1/value2-1)))

#Check: states' imports from countries add to WIOD US imports from countries (sector 13)
value1 <- sum(Xij.matrix[(1:n_s), ((n_s + 1):(n_s + n_c))])
value2 <- wiod_base %>%
  filter(year == yr, sector == n_sec + 1, importer == "USA") %>%
  dplyr::select(-year, -importer, -sector, - USA)
value2 <- sum(value2)
if(abs(value1/value2-1)>epsilon) 
  stop(paste("states' imports from countries != WIOD US imports from countries for", yr, " value=", abs(value1/value2-1)))


#Check: states' exports to states add to WIOD US exports to US (sector 13)
value1 <- sum(Xij.matrix[(1:n_s), (1:n_s)])
value2 <- wiod_base %>%
  filter(year == yr, sector == n_sec + 1, importer=="USA")
value2 <- value2$USA[1]
if(abs(value1/value2-1)>epsilon) 
  stop(paste("states' exports to states != WIOD US exports to US for", yr, " value=", abs(value1/value2-1)))

}
```

